{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "source": [
    "import pandas as pd\r\n",
    "import numpy as np\r\n",
    "import math\r\n",
    "from sklearn.model_selection import KFold\r\n",
    "import matplotlib.pylab as plt\r\n",
    "import lightgbm as lgb\r\n",
    "train_drop = pd.read_csv('./final/train_drop_dis.csv', engine='python')\r\n",
    "test_drop = pd.read_csv('./final/test_drop_dis.csv', engine='python')\r\n",
    "\r\n",
    "train_dataset = lgb.Dataset(train_drop.drop(columns='loan_status'), train_drop['loan_status'])\r\n",
    "test_dataset = lgb.Dataset(test_drop.drop(columns='loan_status'), test_drop['loan_status'])\r\n",
    "\r\n",
    "train_add = train_drop.copy()\r\n",
    "test_add = test_drop.copy()\r\n",
    "\r\n",
    "idx = train_drop['continuous_dti'].index[np.where(np.isnan(train_drop['continuous_dti']))]\r\n",
    "train_add['continuous_dti'][idx] = train_drop['continuous_dti'].mean()\r\n",
    "\r\n",
    "idx = test_drop['continuous_dti'].index[np.where(np.isnan(test_drop['continuous_dti']))]\r\n",
    "test_add['continuous_dti'][idx] = test_drop['continuous_dti'].mean()\r\n",
    "\r\n",
    "train_add['ann_inc_dti_cross'] = train_add['continuous_annual_inc'] * train_add['continuous_dti']\r\n",
    "test_add['ann_inc_dti_cross'] = test_add['continuous_annual_inc'] * test_add['continuous_dti']"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "D:\\AI\\MiniConda\\lib\\site-packages\\ipykernel_launcher.py:17: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "D:\\AI\\MiniConda\\lib\\site-packages\\ipykernel_launcher.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "source": [
    "from sklearn.metrics import accuracy_score\r\n",
    "params_gbdt = {'num_thread': 4, 'num_leaves': 12, 'metric': 'binary', 'objective': 'binary', 'boosting': 'gbdt', 'max_depth': '4', \\\r\n",
    "    'num_round': 3200, 'learning_rate': 0.01, 'feature_fraction': 0.8, 'bagging_fraction': 0.8}\r\n",
    "params_goss = {'num_thread': 4, 'num_leaves': 7, 'metric': 'binary', 'objective': 'binary', 'boosting': 'goss', 'max_depth': '4', \\\r\n",
    "    'num_round': 3600, 'learning_rate': 0.0075, 'feature_fraction': 0.8, 'bagging_fraction': 0.8}\r\n",
    "\r\n",
    "from ran_wang_lib import LGBOpt,LGBFitter\r\n",
    "fitter_gbdt = LGBFitter(label='loan_status')\r\n",
    "fitter_goss = LGBFitter(label='loan_status')\r\n",
    "kfold = KFold(n_splits=5)\r\n",
    "_,test_pred_gbdt,error_gbdt,models_gbdt = fitter_gbdt.train_k_fold(kfold, train_drop, test_drop, params = params_gbdt)\r\n",
    "_,test_pred_goss,error_goss,models_goss = fitter_goss.train_k_fold(kfold, train_add, test_add, params = params_goss)\r\n",
    "test_loan = np.array(test_add['loan_status'])\r\n",
    "print(params_gbdt)\r\n",
    "print('gbdt验证集各模型误差',error_gbdt)\r\n",
    "print('gbdt验证集平均误差',np.mean(error_gbdt))\r\n",
    "print(params_goss)\r\n",
    "print('goss验证集各模型误差',error_goss)\r\n",
    "print('goss验证集平均误差',np.mean(error_goss))\r\n",
    "test_pred = (test_pred_gbdt + test_pred_goss) / 2\r\n",
    "test_pred = (test_pred > 0.5).astype(int)\r\n",
    "print('gbdt测试集误差率',1 - accuracy_score(test_loan,(test_pred_gbdt>0.5).astype(int)))\r\n",
    "print('goss测试集误差率',1 - accuracy_score(test_loan,(test_pred_goss>0.5).astype(int)))\r\n",
    "print('平均模型测试集误差率',1 - accuracy_score(test_loan,test_pred))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "The minimum is attained in round 930\n",
      "Finished loading model, total used 3200 iterations\n",
      "The minimum is attained in round 1002\n",
      "Finished loading model, total used 3200 iterations\n",
      "The minimum is attained in round 883\n",
      "Finished loading model, total used 3200 iterations\n",
      "The minimum is attained in round 1351\n",
      "Finished loading model, total used 3200 iterations\n",
      "The minimum is attained in round 736\n",
      "Finished loading model, total used 3200 iterations\n",
      "The minimum is attained in round 2269\n",
      "Finished loading model, total used 3600 iterations\n",
      "The minimum is attained in round 1196\n",
      "Finished loading model, total used 3600 iterations\n",
      "The minimum is attained in round 2538\n",
      "Finished loading model, total used 3600 iterations\n",
      "The minimum is attained in round 2770\n",
      "Finished loading model, total used 3600 iterations\n",
      "The minimum is attained in round 1630\n",
      "Finished loading model, total used 3600 iterations\n",
      "{'num_thread': 4, 'num_leaves': 12, 'metric': 'binary', 'objective': 'binary', 'boosting': 'gbdt', 'max_depth': '4', 'num_round': 3200, 'learning_rate': 0.01, 'feature_fraction': 0.8, 'bagging_fraction': 0.8}\n",
      "gbdt验证集各模型误差 [0.07230000000000003, 0.08130000000000004, 0.08379999999999999, 0.08279999999999998, 0.07979999999999998]\n",
      "gbdt验证集平均误差 0.08\n",
      "{'num_thread': 4, 'num_leaves': 7, 'metric': 'binary', 'objective': 'binary', 'boosting': 'goss', 'max_depth': '4', 'num_round': 3600, 'learning_rate': 0.0075, 'feature_fraction': 0.8, 'bagging_fraction': 0.8}\n",
      "goss验证集各模型误差 [0.07269999999999999, 0.0806, 0.08420000000000005, 0.08389999999999997, 0.08040000000000003]\n",
      "goss验证集平均误差 0.08036000000000001\n",
      "gbdt测试集误差率 0.08116000000000001\n",
      "goss测试集误差率 0.08123999999999998\n",
      "平均模型测试集误差率 0.08113999999999999\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "source": [
    "lgb_params = {\r\n",
    "    'boosting_type': 'gbdt',\r\n",
    "    'objective': 'binary',\r\n",
    "    'num_leaves': 10,\r\n",
    "    'num_round': 3200,\r\n",
    "    'max_depth':4,\r\n",
    "    'learning_rate': 0.01,\r\n",
    "    'feature_fraction': 0.8,\r\n",
    "    'bagging_fraction': 0.8,\r\n",
    "}\r\n",
    "model = lgb.train(lgb_params, train_dataset)\r\n",
    "importance = model.feature_importance(importance_type='split')\r\n",
    "feature_name = model.feature_name()\r\n",
    "feature_importance = pd.DataFrame({\r\n",
    "  'feature_name':feature_name,'importance':importance} )\r\n",
    "feature_importance_desc =  feature_importance.sort_values(by=['importance'],ascending=[False])\r\n",
    "top_features = feature_importance_desc.head(30)\r\n",
    "top_features = top_features['feature_name']\r\n",
    "top_features = list(top_features)\r\n",
    "top_features.append('loan_status')\r\n",
    "train_top_features = train_drop[top_features]\r\n",
    "test_top_features = test_drop[top_features]"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[LightGBM] [Info] Number of positive: 39788, number of negative: 10212\n",
      "[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005697 seconds.\n",
      "You can set `force_col_wise=true` to remove the overhead.\n",
      "[LightGBM] [Info] Total Bins 2500\n",
      "[LightGBM] [Info] Number of data points in the train set: 50000, number of used features: 28\n",
      "[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.795760 -> initscore=1.360002\n",
      "[LightGBM] [Info] Start training from score 1.360002\n"
     ]
    },
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "D:\\AI\\MiniConda\\lib\\site-packages\\lightgbm\\engine.py:148: UserWarning: Found `num_round` in params. Will use it instead of argument\n",
      "  _log_warning(\"Found `{}` in params. Will use it instead of argument\".format(alias))\n"
     ]
    },
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n",
      "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "source": [
    "TRAIN_IDX = train_drop.shape[0]\r\n",
    "x_train = np.array(train_top_features.drop(columns='loan_status'))\r\n",
    "y_train = np.array(train_top_features['loan_status'])\r\n",
    "x_test = np.array(test_top_features.drop(columns='loan_status'))\r\n",
    "y_test = np.array(test_top_features['loan_status'])"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "source": [
    "from encoder import discretize\r\n",
    "x_train_np, x_test_np = x_train, x_test\r\n",
    "y_train_np, y_test_np = y_train, y_test\r\n",
    "x = np.concatenate([x_train_np, x_test_np])\r\n",
    "x_dis, centroids = discretize(x)\r\n",
    "x_dis_train = x_dis[:TRAIN_IDX, :]\r\n",
    "x_dis_test = x_dis[TRAIN_IDX:,:]"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "source": [
    "import torch\r\n",
    "from neural_network import *\r\n",
    "nn_ckpt = torch.load('E:/JupyterNoteBook/Geek_ML/final_project/logs/default/version_12/checkpoints/epoch=4-step=7813.ckpt')\r\n",
    "training_module = TrainingModuleV2(x_dis, 16, 96, 0.5, 0.1, 10)\r\n",
    "training_module.load_state_dict(nn_ckpt['state_dict'])"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "D:\\AI\\MiniConda\\lib\\site-packages\\deprecate\\deprecation.py:115: LightningDeprecationWarning: The `Accuracy` was deprecated since v1.3.0 in favor of `torchmetrics.classification.accuracy.Accuracy`. It will be removed in v1.5.0.\n",
      "  stream(template_mgs % msg_args)\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "<All keys matched successfully>"
      ]
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "source": [
    "from sklearn.metrics import accuracy_score\r\n",
    "test_loan = test_drop['loan_status']\r\n",
    "nn_pred = training_module.eval()\r\n",
    "nn_pred = nn_pred(torch.LongTensor(x_dis_test)).detach().numpy()\r\n",
    "nn_pred = (nn_pred > 0.5).astype(int)\r\n",
    "1 - accuracy_score(test_loan,nn_pred)"
   ],
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0.08509999999999995"
      ]
     },
     "metadata": {},
     "execution_count": 7
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "source": [
    "test_pred = np.zeros(nn_pred.shape)\r\n",
    "for i in range(len(test_pred)):\r\n",
    "    if (test_pred_gbdt[i] > 0.5 and test_pred_goss[i] > 0.5) or (test_pred_goss[i] >0.5 and nn_pred[i]>0.5) or (nn_pred[i] > 0.5 and test_pred_gbdt[i]>0.5):\r\n",
    "        test_pred[i] = 1\r\n",
    "test_pred = (test_pred).astype(int)\r\n",
    "print('平均模型测试集误差率',1 - accuracy_score(test_loan,test_pred))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "平均模型测试集误差率 0.08098000000000005\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "source": [
    "test_pred = (nn_pred + test_pred_gbdt + test_pred_goss) / 3\r\n",
    "test_pred = (test_pred > 0.5).astype(int)\r\n",
    "print('平均模型测试集误差率',1 - accuracy_score(test_loan,test_pred))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "平均模型测试集误差率 0.08496000000000004\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [],
   "outputs": [],
   "metadata": {}
  }
 ],
 "metadata": {
  "orig_nbformat": 4,
  "language_info": {
   "name": "python",
   "version": "3.7.6",
   "mimetype": "text/x-python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "pygments_lexer": "ipython3",
   "nbconvert_exporter": "python",
   "file_extension": ".py"
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3.7.6 64-bit ('tf2': conda)"
  },
  "interpreter": {
   "hash": "20412a29546dc5dc3251d3a421e4aba5890d5580acd78dc93db7ce5a06c5b04f"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}